####################################
#Diff-in-diff local linear regression
#Traffic and Political Attitudes
#Connor Jerzak & Brian Libgober
####################################

########################
#Initial setup 
########################
#set wd
setwd("./Dropbox/Collaborations/Traffic/project_analysis")

#external libraries 
library(ggplot2)

#load in the dd_with_dd function 
source("dd_with_matchBoot_function.R")

##############################
#set variable values
##############################

#load in data 
NJ_data <- read.csv("NJ_precinct_master.csv", header=T)
#PA_data <- read.csv("PA_precinct_master.csv", header=T)

#set general specifications for later modifications
my_data <- PA_data 

#outcomes 
outcome_time1_name <- "USPP2000"
outcome_time2_name <- "USPP2004"  #US president (dem) percent 2000, 2004

#identify matching variables 
#matching_variables <- c("cepctwhite", "cepctblack", "cepctwomen", "cnipoverty", "cnpopurban") #income, commute length, number of cars, etc.
matching_variables <- c("census_purban", "census_pblack", "census_punemployed") #"census_pfam_belowpoverty", "census_pworkers_Commute_less9") #income, commute length, number of cars, etc. 

#treated basis (something having to do with E-ZPasses)
treatment_basis_name <- "EZDist_PA_NEAR_DIST" 

#control basis (something having to do with non-E-ZPass highway)
control_basis_name <- c("minDist_nonEZ_hwy")

par(mfrow = c(2, 1))
test <- loess(USPP2004-USPP2000 ~ EZDist_PA_NEAR_DIST, data=my_data)
summary(test)
plot(test, xlim=c(0,200000))

test <- loess(USPP2004-USPP2000~minDist_nonEZ_hwy, data=my_data)
summary(test)
plot(test, xlim=c(0,200000))

test <- lm(USPP2004-USPP2000~  EZDist_PA_NEAR_DIST + #EZPlaza_TravelDistOptimizedDist_PA_Total_Time
             minDist_nonEZ_hwy + 
             census_pblack + 
             census_purban + census_200over, data=my_data)
summary(test)

##############################
#loop to generate figure 
##############################
max_length_out <- 10; max_seq <- seq(120, 2500, length.out = max_length_out)
min_length_out <- 2; min_seq <- seq(1000, 38000, length.out = min_length_out)
BIG_STORAGE_LIST <- list()
counter1 <- 0 
for(j in min_seq)
{ 
  counter1 <- counter1 + 1 
  results_list <- list(); counter2 <- 0; for(i in max_seq)
  { 
    counter2 <- counter2 + 1
    print(counter2)
    results_list[[counter2]] <- dd_with_dd(my_data=my_data,
                                           treatment_basis_name=treatment_basis_name, 
                                           control_basis_name=control_basis_name, 
                                           outcome_time1_name=outcome_time1_name, 
                                           outcome_time2_name=outcome_time2_name,
                                           matching_variables=matching_variables,
                                           treatment_basis_treated_max=i, 
                                           treatment_basis_control_min=j, 
                                           control_basis_control_max=i,
                                           control_basis_treated_min=1,
                                           boostrap_SDs=F, 
                                           bootstrap_numb=1, 
                                           conf_level=0.95, 
                                           my_method = "nearest", 
                                           my_replace=T, 
                                           start_browser=F)
  }
  BIG_STORAGE_LIST[[counter1]] <- results_list
}

####################
#save and plot results
####################
#save results as appropriate objects 
results_list <- unlist(BIG_STORAGE_LIST, recursive=F, use.names=T)
index_seq <- seq(1, max_length_out*min_length_out)
for(index in 1:min_length_out)
{
  start <- max_length_out*(index-1)+1
  end <- max_length_out*(index)
  
  internal_list <- results_list[start:end]
  
  unlisted_results <- unlist(internal_list, recursive=F, use.names=F) 
  base_results <- unlist(unlisted_results[(1:length(unlisted_results))%%4==1])
  upper <- unlist(unlisted_results[(1:length(unlisted_results))%%4==2])
  lower <- unlist(unlisted_results[(1:length(unlisted_results))%%4==3])
  match_numbs <- unlist(unlisted_results[(1:length(unlisted_results))%%4==0])
  
  gg_data <- data.frame(cbind(max_seq, base_results))
  
  g <- ggplot(gg_data, aes(x=max_seq, y=base_results)) + 
    geom_point() + 
    geom_errorbar(aes(ymin=lower, ymax=upper), width=0.2) + 
    geom_hline(yintercept = 0, line_type="dashed") + 
    geom_errorbar(aes(ymin=lower, ymax=upper), width=0.2) + 
    ylab("DiD effect (in % points)") + 
    xlab("Maximum distance from E-ZPass or Control Highway") + 
    ggtitle("Maximum Distance vs. DiD Effect (in % points)") + 
    theme(text = element_text(size=13)) 
  ggsave(g, file=sprintf("nearest_match_iteration%i.png", index))
}


#setup for ggplot 2 
gg_data <- data.frame(cbind(max_seq, base_results))

g1 <- ggplot(gg_data, aes(x=max_seq, y=base_results)) + 
  geom_point() + 
  geom_errorbar(aes(ymin=lower, ymax=upper), width=0.2) + 
  geom_hline(yintercept = 0, line_type="dashed") + 
  geom_errorbar(aes(ymin=lower, ymax=upper), width=0.2) + 
  ylab("DiD effect (in % points)") + 
  xlab("Maximum distance from E-ZPass or Control Highway") + 
  ggtitle("Maximum Distance vs. DiD Effect (in % points)") + 
  theme(text = element_text(size=13)) 
g1

#save .png to directory.
ggsave(g1, file="PA_figure_dd_with_dd.R")
